
# ------------------------------------------------------------------------------
#
# Code for MLSC Occupation-Cognition Study
# Prepared Spring, 2024
#
# Load workspace S1.RData by double-clicking on it or opening it from within
# R's native console or your preferred R IDE
#
# The following code reproduces the tables for the manuscript
#
# ------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
# Load libraries
# ------------------------------------------------------------------------------

library(lavaan)


# ------------------------------------------------------------------------------
# SEM estimation of factor scores for COG.i, COG.s
# ------------------------------------------------------------------------------

# both processes
m.COG.ils   = 'fCOG.i   =~ fsMVIS.i  + fsSPD.i  + fsRSN.i
               fCOG.ls  =~ fsMVIS.ls + fsSPD.ls + fsRSN.ls
               fCOG.i   ~~ fCOG.ls
               fsMVIS.i ~~ fsMVIS.ls
               fsSPD.i  ~~ fsSPD.ls
               fsRSN.i  ~~ fsRSN.ls
              '	
fit.COG.ils = cfa(m.COG.ils,  data = d, missing = "ML")
# non-positive definite, moving on...

# individual processes
m.COG.i     = 'fCOG.i  =~ fsMVIS.i  + fsSPD.i  + fsRSN.i'
m.COG.ls    = 'fCOG.ls =~ fsMVIS.ls + fsSPD.ls + fsRSN.ls'

fit.COG.i  = cfa(m.COG.i,  data = d, missing = "ML")	
fit.COG.ls = cfa(m.COG.ls, data = d, missing = "ML")
summary(fit.COG.i, fit.measures = TRUE, standardized = TRUE)	
summary(fit.COG.ls, fit.measures = TRUE, standardized = TRUE)

d$COG.i  = lavPredict(fit.COG.i)
d$COG.ls = lavPredict(fit.COG.ls)

# also get scaled so we can estimate betas in regression
d$COG.i.scl  = scale(d$COG.i)
d$COG.ls.scl = scale(d$COG.ls)

# Individuals' COG.i and COG.s summary (mean, sd)
mean(d$COG.i);  sd(d$COG.i)
mean(d$COG.ls); sd(d$COG.ls)
var(d$COG.i) / var(d$COG.ls)


# ------------------------------------------------------------------------------
# Get summary statistics for demographics and covariates
# ------------------------------------------------------------------------------

get_perc <- function(n, N, nsml=1)
    {
    paste("{", format(round((n/N)*100,1),nsmall=nsml), "}", sep = '')	
    }

get_smry <- function(dat, tot = NULL)
    {
	if(is.null(tot)) tot = nrow(dat)
	# percent based on total N
	N   = nrow(dat)
	Np  = get_perc(N, tot)
	# percents based on category n
	Fem  = length(which(dat$Female == TRUE))
	Femp = get_perc(Fem, N)
	Mch  = length(which(dat$Manchester == TRUE))
	Mchp = get_perc(Mch, N)
	Edu  = length(which(dat$PSE == TRUE))
    Edup = get_perc(Edu, N)
	Smk  = length(which(dat$Smoker == TRUE))
    Smkp = get_perc(Smk, N)
	Age = round(median(dat$AgeAtEntry))
	Ager = paste("[",
		         round(min(dat$AgeAtEntry, na.rm = T)),
		         ",",
		         round(max(dat$AgeAtEntry, na.rm = T)),
		         "]",
		         sep = ''
	             )    
	PhysAct = round(median(dat$PhysAct, na.rm = T))
	PhysActr = paste("[",
		         format(round(summary(dat$PhysAct)[[2]],1), nsmall=1),
		         ",",
		         format(round(summary(dat$PhysAct)[[5]],1), nsmall=1),
		         "]",
		         sep = ''
	             )    
	Rx = round(median(dat$RxMeds, na.rm = T))
	Rxr = paste("[",
		         format(round(summary(dat$RxMeds)[[2]],1), nsmall=1),
		         ",",
		         format(round(summary(dat$RxMeds)[[5]],1), nsmall=1),
		         "]",
		         sep = ''
	             )      
	Slp = round(median(dat$SlpHrs, na.rm = T))
	Slpr = paste("[",
		         format(round(summary(dat$SlpHrs)[[2]],1), nsmall=1),
		         ",",
		         format(round(summary(dat$SlpHrs)[[5]],1), nsmall=1),
		         "]",
		         sep = ''
	             ) 
	Mry  = length(which(dat$Married == 1))
    Mryp = get_perc(Mry, N)
	Hbi = round(median(dat$Hobbies, na.rm = T))
	Hbir = paste("[",
		         format(round(summary(dat$Hobbies)[[2]],1), nsmall=1),
		         ",",
		         format(round(summary(dat$Hobbies)[[5]],1), nsmall=1),
		         "]",
		         sep = ''
	             )    
	Ret  = length(which(dat$Retired == 1))
	Retp = get_perc(Ret, N)
	
	# cognitive measures
	get_musd <- function(v)
	    {
		mu    = format(round(mean(v, na.rm = T), 2), nsmall = 2)
		sigma = format(round(sd(v, na.rm = T), 2), nsmall = 2)
		paste(mu, " (", sigma, ")", sep = '')
	    }
	PS.i   = get_musd(dat$fsSPD.i)
	PS.ls  = get_musd(dat$fsSPD.ls)
	Gf.i   = get_musd(dat$fsRSN.i)
	Gf.ls  = get_musd(dat$fsRSN.ls)
	VM.i   = get_musd(dat$fsMVIS.i)
	VM.ls  = get_musd(dat$fsMVIS.ls)
	COG.i  = get_musd(dat$COG.i)
	COG.ls = get_musd(dat$COG.ls)
	
    # Output
	c("N"         = N,
	  "N(%)"      = Np,
	  "Age"       = Age, 
	  "[min,max]" = Ager,
	  "Fem"       = Fem, 
	  "Fem(%)"    = Femp,
	  "Mch"       = Mch, 
	  "Mch(%)"    = Mchp,
	  "Edu"       = Edu, 
	  "Edu(%)"    = Edup,
	  "Smk"       = Smk,
	  "Smk(%)"    = Smkp,
	  "PhysAct"   = PhysAct,
	  "[IQR]"     = PhysActr,
	  "Rx"        = Rx,
	  "[IQR]"     = Rxr,
	  "Slp"       = Slp,
	  "[IQR]"     = Slpr,
	  "Mry"       = Mry,
	  "Mry(%)"    = Mryp,
	  "Hob"       = Hbi,
	  "[IQR]"     = Hbir,
	  "Ret"       = Ret, 
	  "Ret(%)"    = Retp,
	  "PS.i"      = PS.i,  
	  "PS.ls"     = PS.ls, 
	  "Gf.i"      = Gf.i,  
	  "Gf.ls"     = Gf.ls, 
	  "VM.i"      = VM.i,  
	  "VM.ls"     = VM.ls, 
	  "COG.i"     = COG.i, 
	  "COG.ls"    = COG.ls	  
	  )
    }

get_smrys <- function(dat, grp, tot=NULL)
    {
	smry = by(dat, dat[,grp], get_smry, tot = tot)
	as.data.frame(do.call("rbind", smry))
    }

get_smrys_bySOC <- function(dat)
    {
    N = nrow(dat)
    smry_tot = as.data.frame(t(get_smry(dat, N)))
    smry_oc1 = get_smrys(dat, "SOC1", N)
    smry_oc2 = get_smrys(dat, "SOC2", N)
    smry_tot$OC = "All"
    smry_oc1$OC = row.names(smry_oc1)
    smry_oc2$OC = row.names(smry_oc2)
    smry = rbind(smry_tot, smry_oc1, smry_oc2)
    row.names(smry) = c(1:nrow(smry))
    smry
    }

# Education specifically
round((table(d$HigherEduType) / nrow(d))*100, 1)
mean(d$AgeLeftEdu, na.rm = T)
sd(d$AgeLeftEdu, na.rm = T)
by(d$AgeLeftEdu, d$SOC1, function(x) 
	round(c(mean(x, na.rm=T), sd(x, na.rm=T)),2)
	)
by(d$AgeLeftEdu, d$HigherEduType, function(x) 
	round(c(mean(x, na.rm=T), sd(x, na.rm=T)),2)
	)
d_NoPSE = d[d$HigherEduType == 'None',]
NoPSE_PercsByAge = (table(d_NoPSE$AgeLeftEdu) / nrow(d_NoPSE))*100
round(sum(NoPSE_PercsByAge[7:10]), 2)  #61.9% 14y, 89.1% 14-16y

	
# Table 1 (participants w/SOC)
smry0.a = get_smrys_bySOC(d)
smry0.a = smry0.a[order(smry0.a$OC), c(33,1:10)]
smry0.a = smry0.a[c(1,5:nrow(smry0.a),2:4),]
smry0.a

# Supplemental Table 1 (participants w/education & lifestyle data)
hl.cov = c("HigherEduType", "AgeLeftEdu", "Smoker", "RxMeds", "PhysAct", 
	       "SlpHrs", "Married", "Hobbies", "Retired")
dc = d[which(complete.cases(d[,hl.cov])),]
nrow(dc)
smry0.b = get_smrys_bySOC(dc)
smry0.b1 = smry0.b[order(smry0.b$OC), c(33,11:12,15:16,13:14,17:18,21:22,19:20)]	
smry0.b1 = smry0.b1[c(1,5:nrow(smry0.b1),2:4),]
smry0.b1

# Supplemental Table 2 (participants w/SOC, cognitive summaries)
smry0.b2 = smry0.b[order(smry0.b$OC), c(33,25:32)]
smry0.b2 = smry0.b2[c(1,5:nrow(smry0.b2),2:4),]
smry0.b2


# ------------------------------------------------------------------------------
# Format data for regression analyses
# ------------------------------------------------------------------------------

# make sure categorical vars specified as factors
d$SOC1 = as.factor(d$SOC1)
d$SOC2 = as.factor(d$SOC2)
d$SOC_Skill = as.factor(d$SOC_Skill)

# set reference levels (choices aid interpretation of 'gaps' between groups)
d$SOC1 = relevel(d$SOC1, ref = "SOC.2")         		  # professional
d$SOC2 = relevel(d$SOC2, ref = "SOC.23")				  # teaching/research
d$SOC_Skill = relevel(d$SOC_Skill, ref = "4")		      # highest
d$HigherEduType = relevel(d$HigherEduType, ref = "University")  

# center covariates
d$AgeAtEntry.ctr = d$AgeAtEntry - 70
d$AgeLeftEdu.ctr = d$AgeLeftEdu - 16
d$RxMeds.ctr = d$RxMeds - mean(d$RxMeds, na.rm = T)
d$PhysAct.ctr = d$PhysAct - mean(d$PhysAct, na.rm = T)
d$SlpHrs.ctr = d$SlpHrs - mean(d$SlpHrs, na.rm = T)
d$Hobbies.ctr = d$Hobbies - mean(d$Hobbies, na.rm = T)

# scaled (for betas)
d$AgeAtEntry.scl   = scale(d$AgeAtEntry)
d$AgeLeftEdu.scl   = scale(d$AgeLeftEdu)
d$RxMeds.scl       = scale(d$RxMeds)
d$PhysAct.scl      = scale(d$PhysAct)
d$SlpHrs.scl       = scale(d$SlpHrs)
d$Hobbies.scl      = scale(d$Hobbies)


# ------------------------------------------------------------------------------
# Specify sets of covariates
# ------------------------------------------------------------------------------

# Predictors
cov.0     = c("1")
cov.a     = c("Female", "AgeAtEntry.ctr", "Manchester")
cov.b     = c(cov.a, "HigherEduType", "AgeLeftEdu.ctr", "Smoker", "RxMeds.ctr", 
	                 "PhysAct.ctr", "SlpHrs.ctr", "Hobbies.ctr", "Married", 
	                 "Retired")
cov.a.scl = c("Female", "AgeAtEntry.scl", "Manchester") 
cov.b.scl = c(cov.a.scl, "HigherEduType", "AgeLeftEdu.scl", "Smoker", 
	                     "RxMeds.scl", "PhysAct.scl", "SlpHrs.scl", 
	                     "Hobbies.scl", "Married", "Retired")

cov1.0 = c("SOC1",         cov.0) 
cov2.0 = c("SOC2",         cov.0)
cov3.0 = c("SOC_Skill",    cov.0) 
cov1.a = c("SOC1",         cov.a) 
cov2.a = c("SOC2",         cov.a) 
cov3.a = c("SOC_Skill",    cov.a) 
cov1.b = c("SOC1",         cov.b) 
cov2.b = c("SOC2",         cov.b)

cov1.a.scl = c("SOC1",         cov.a.scl) 
cov2.a.scl = c("SOC2",         cov.a.scl)
cov3.a.scl = c("SOC_Skill",    cov.a.scl)
cov1.b.scl = c("SOC1",         cov.b.scl) 
cov2.b.scl = c("SOC2",         cov.b.scl)

# function to make regression formula
mk_frm <- function(DV, IVs)
    {
	paste(DV, paste(IVs, collapse = " + "), sep = " ~ ")
    }


# ------------------------------------------------------------------------------
# Select data sets by covariates for valid tests of change in Rsq
# ------------------------------------------------------------------------------

# d = full data set (all participants w/SOC)  (N = 5,542)
nrow(d) # 5,542

# d.cr = participants w/education and hobbies (N = 5,377)
d.cpr = d[complete.cases(d[,c("HigherEduType", "AgeLeftEdu", "Hobbies"),]),]
nrow(d.cpr)

# d.cov = participants w/education and lifestyle covariates (N = 5,161)
d.cov = d[complete.cases(d[,cov.b]),]


# ------------------------------------------------------------------------------
# Regression: SOC1 & SOC2
# ------------------------------------------------------------------------------

## SOC 1 (major categories)
lm.COGi.1  <- lm(as.formula(mk_frm("COG.i", cov1.0)), data = d)
lm.COGi.1a <- lm(as.formula(mk_frm("COG.i", cov1.a)), data = d)
lm.COGi.1b <- lm(as.formula(mk_frm("COG.i", cov1.b)), data = d.cov)
lm.COGi.1a.scl <- lm(as.formula(mk_frm("COG.i.scl", cov1.a.scl)), data = d)
lm.COGi.1b.scl <- lm(as.formula(mk_frm("COG.i.scl", cov1.b.scl)), data = d.cov)
lm.COGls.1 <- lm(as.formula(mk_frm("COG.ls", cov1.0)), data = d)
lm.COGls.1a <- lm(as.formula(mk_frm("COG.ls", cov1.a)), data = d)
lm.COGls.1b <- lm(as.formula(mk_frm("COG.ls", cov1.b)), data = d.cov)
lm.COGls.1a.scl <- lm(as.formula(mk_frm("COG.ls.scl", cov1.a.scl)), data = d)
lm.COGls.1b.scl <- lm(as.formula(mk_frm("COG.ls.scl",cov1.b.scl)), data = d.cov)

## SOC 2 (sub-major categories: focal analyses)
lm.COGi.2  <- lm(as.formula(mk_frm("COG.i", cov2.0)), data = d)
lm.COGi.2a <- lm(as.formula(mk_frm("COG.i", cov2.a)), data = d)
lm.COGi.2b <- lm(as.formula(mk_frm("COG.i", cov2.b)), data = d.cov)
lm.COGi.2a.scl <- lm(as.formula(mk_frm("COG.i.scl", cov2.a.scl)), data = d)
lm.COGi.2b.scl <- lm(as.formula(mk_frm("COG.i.scl", cov2.b.scl)), data = d.cov)
lm.COGls.2 <- lm(as.formula(mk_frm("COG.ls", cov2.0)), data = d)
lm.COGls.2a <- lm(as.formula(mk_frm("COG.ls", cov2.a)), data = d)
lm.COGls.2b <- lm(as.formula(mk_frm("COG.ls", cov2.b)), data = d.cov)
lm.COGls.2a.scl <- lm(as.formula(mk_frm("COG.ls.scl", cov2.a.scl)), data = d)
lm.COGls.2b.scl <- lm(as.formula(mk_frm("COG.ls.scl",cov2.b.scl)), data = d.cov)

### R-square
get_rsq <- function(fit) round(summary(fit)$r.squared, 3)

## w/out education, health, and lifestyle covariates (N=5,542)
get_rsq(lm.COGi.1)		# .173
get_rsq(lm.COGi.1a)		# .265
get_rsq(lm.COGls.1)		# .034
get_rsq(lm.COGls.1a)	# .046
get_rsq(lm.COGi.2)		# .186
get_rsq(lm.COGi.2a)		# .275
get_rsq(lm.COGls.2)		# .039
get_rsq(lm.COGls.2a)	# .051

## with education, health, and lifestyle covariates (N=5,161)
get_rsq(lm.COGi.1b)		# .332	
get_rsq(lm.COGls.1b)	# .072	
get_rsq(lm.COGi.2b)		# .335	
get_rsq(lm.COGls.2b)	# .075	

## predictive effects of lifestyle covariates
get_rsq(update(lm.COGi.2a, . ~ ., data = d.cov))   # .266 vs. .335  (6.9%)                 
get_rsq(update(lm.COGls.2a, . ~ ., data = d.cov))  # .053 vs. .075  (2.2%)
rem.covs = cov2.b[-c(1:6,11)]
frm.up = paste('. ~ . -', paste(rem.covs, collapse = ' - '))
get_rsq(update(lm.COGi.2b, as.formula(frm.up)))     # .326 vs. .335 (0.9%)                  
get_rsq(update(lm.COGls.2b, as.formula(frm.up)))    # .069 vs. .075 (0.7%) 


# ------------------------------------------------------------------------------
# Follow-up to compare Rsq w/SOC2, edu, hobbies, & combined
# ------------------------------------------------------------------------------

lm.COGi.4a  <- lm(as.formula(mk_frm("COG.i",  "HigherEduType")), data = d.cpr)
lm.COGls.4a <- lm(as.formula(mk_frm("COG.ls", "HigherEduType")), data = d.cpr)
lm.COGi.4b  <- lm(as.formula(mk_frm("COG.i",  "AgeLeftEdu")), data = d.cpr)
lm.COGls.4b <- lm(as.formula(mk_frm("COG.ls", "AgeLeftEdu")), data = d.cpr)
lm.COGi.4c  <- lm(as.formula(mk_frm("COG.i", c("AgeLeftEdu", "HigherEduType"))), 
	                         data = d.cpr)
lm.COGls.4c <- lm(as.formula(mk_frm("COG.ls",c("AgeLeftEdu", "HigherEduType"))), 
	                         data = d.cpr)
lm.COGi.4d  <- lm(as.formula(mk_frm("COG.i",  "Hobbies")), data = d.cpr)
lm.COGls.4d <- lm(as.formula(mk_frm("COG.ls", "Hobbies")), data = d.cpr)

# hierarchical change Rsq
lm.COGi.4e  <- update(lm.COGi.4c, . ~ . + SOC2)
lm.COGi.4f  <- update(lm.COGi.4e, . ~ . + Hobbies)
lm.COGls.4e <- update(lm.COGls.4c, . ~ . + SOC2)
lm.COGls.4f <- update(lm.COGls.4e, . ~ . + Hobbies)

# COG.i
get_rsq(lm.COGi.2)			# .186  SOC2
get_rsq(lm.COGi.4a) 		# .160  Higher Edu Type
get_rsq(lm.COGi.4b)			# .071	Age left Edu
get_rsq(lm.COGi.4c)			# .171  Higher Edu + Age left Edu
get_rsq(lm.COGi.4d)			# .085  Hobbies	
get_rsq(lm.COGi.4e)			# .230  Edu + SOC2
get_rsq(lm.COGi.4f)			# .257  Edu + + SOC2 + Hobbies

# COG.ls
get_rsq(lm.COGls.2)			# .039  SOC2
get_rsq(lm.COGls.4a) 		# .024  Higher Edu Type
get_rsq(lm.COGls.4b)		# .013	Age left Edu
get_rsq(lm.COGls.4c)		# .027  Higher Edu + Age left Edu
get_rsq(lm.COGls.4d)		# .025  Hobbies	
get_rsq(lm.COGls.4e)		# .045  Edu + SOC2
get_rsq(lm.COGls.4f)		# .057  Edu + SOC2 + Hobbies


# ------------------------------------------------------------------------------
# Moderation models skill * hobbies
# ------------------------------------------------------------------------------

## Skill level (cuts across some SOC occupation groups) - use all dat here
lm.COGi.4  <- lm(as.formula(mk_frm("COG.i", cov3.0)), data = d)
lm.COGi.4a <- lm(as.formula(mk_frm("COG.i", cov3.a)), data = d)
lm.COGls.4 <- lm(as.formula(mk_frm("COG.ls", cov3.0)), data = d)
lm.COGls.4a <- lm(as.formula(mk_frm("COG.ls", cov3.a)), data = d)

## Hobbies: higher functioning professions tended to list more hobbies 
cov3.a1 = c(cov3.a, c("HigherEduType", "AgeLeftEdu.ctr", "Hobbies.ctr")) 
cov3.a2 = c(cov3.a1, c("Hobbies.ctr*SOC_Skill")) 
cov3.a1.scl = c(cov3.a.scl, c("HigherEduType", "AgeLeftEdu.scl", "Hobbies.scl")) 
cov3.a2.scl = c(cov3.a1.scl, c("Hobbies.scl*SOC_Skill")) 
lm.COGi.4a1  <- lm(as.formula(mk_frm("COG.i",  cov3.a1)), data = d.cpr)
lm.COGls.4a1 <- lm(as.formula(mk_frm("COG.ls", cov3.a1)), data = d.cpr)
lm.COGi.4a2  <- lm(as.formula(mk_frm("COG.i",  cov3.a2)), data = d.cpr)
lm.COGls.4a2 <- lm(as.formula(mk_frm("COG.ls", cov3.a2)), data = d.cpr)
anova(lm.COGi.4a1, lm.COGi.4a2)   # sig
anova(lm.COGls.4a1,lm.COGls.4a2)  # non-sig
lm.COGi.4a2.scl  <- lm(as.formula(mk_frm("COG.i.scl", cov3.a2.scl)), data = d.cpr)
lm.COGls.4a1.scl <- lm(as.formula(mk_frm("COG.ls.scl", cov3.a1.scl)), data = d.cpr)

## R-square
# full data, no interaction w/hobbies
get_rsq(lm.COGi.4)        # .121
get_rsq(lm.COGi.4a)       # .219
get_rsq(lm.COGls.4)	      # .017
get_rsq(lm.COGls.4a)	  # .033
# cpr data, interaction w/hobbies
get_rsq(lm.COGi.4a2)      # .310
get_rsq(lm.COGls.4a1)	  # .059


# ------------------------------------------------------------------------------
# Extrapolation using population weights for SOC1 categories (1991, 2021)
# ------------------------------------------------------------------------------

# set up data set for sensitivity analysis
d.sens = d

# remove unemployed & homemakers
d.sens <- d[which(d$SOC1!= "SOC.0"),]  
d.sens$SOC1 = droplevels(d.sens$SOC1)
nrow(d.sens) # n = 5,067

# order SOC1 levels for consistency w/weighting percentages
levels(d.sens$SOC1)
d.sens$SOC1 = relevel(d.sens$SOC1, ref = "SOC.1")
SOC1s = levels(d.sens$SOC1)
SOC1s

# split on sex
d.sens.m = d.sens[d.sens$Female == FALSE,]
d.sens.f = d.sens[d.sens$Female == TRUE,]

# get percent major occupational class by sex
smp.m = table(d.sens.m$SOC1)/nrow(d.sens.m)*100
smp.f = table(d.sens.f$SOC1)/nrow(d.sens.m)*100

# 1991 UK census
pop.m.1991 = c(16.50,  9.75, 12.00,  5.00, 25.00,  1.75,  2.50, 15.00, 12.50)
pop.f.1991 = c( 8.00,  7.50, 11.50, 28.00,  4.00, 10.00, 10.00,  5.00, 16.00)
wts.m.1991 = pop.m.1991 / smp.m
wts.f.1991 = pop.f.1991 / smp.f

# 2021 UK census
pop.m.2021 = c(15.61, 18.44, 12.60,  4.19, 17.63,  3.51, 5.52, 11.68, 10.83) 
pop.f.2021 = c( 9.70, 22.08, 13.81, 14.79,  2.26, 15.71, 9.68,  1.89, 10.08) 
wts.m.2021 = pop.m.2021 / smp.m
wts.f.2021 = pop.f.2021 / smp.f


# add weights
d.sens.m$SOCwt.1991 = sapply(d.sens.m$SOC1, function(x) 
                          {
	                      wts.m.1991[which(x == SOC1s)]
                          })
d.sens.f$SOCwt.1991 = sapply(d.sens.f$SOC1, function(x) 
                          {
	                      wts.f.1991[which(x == SOC1s)]
                          })
d.sens.m$SOCwt.2021 = sapply(d.sens.m$SOC1, function(x) 
                          {
	                      wts.m.2021[which(x == SOC1s)]
                          })
d.sens.f$SOCwt.2021 = sapply(d.sens.f$SOC1, function(x) 
                          {
	                      wts.f.2021[which(x == SOC1s)]
                          })

# normalize weights
d.sens.m$SOCwt.1991 = d.sens.m$SOCwt.1991 / 
                      (sum(d.sens.m$SOCwt.1991) * nrow(d.sens.m))
d.sens.m$SOCwt.2021 = d.sens.m$SOCwt.2021 / 
                      (sum(d.sens.m$SOCwt.2021) * nrow(d.sens.m))
d.sens.f$SOCwt.1991 = d.sens.f$SOCwt.1991 / 
                      (sum(d.sens.f$SOCwt.1991) * nrow(d.sens.f))
d.sens.f$SOCwt.2021 = d.sens.f$SOCwt.2021 / 
                      (sum(d.sens.f$SOCwt.2021) * nrow(d.sens.f))

# check weights
hist(d.sens.m$SOCwt.1991, breaks = 100)
hist(d.sens.m$SOCwt.2021, breaks = 100)
hist(d.sens.f$SOCwt.1991, breaks = 100)
hist(d.sens.f$SOCwt.2021, breaks = 100)

# recombine sexes & check SOC1 & weights
d.sens = rbind(d.sens.m, d.sens.f)
unique(d.sens[d.sens$Female==FALSE,c("SOC1", "SOCwt.1991")])
unique(d.sens[d.sens$Female==FALSE,c("SOC1", "SOCwt.2021")])

# make SOC2 (professional) the reference level again
d.sens$SOC1 = relevel(d.sens$SOC1, ref = "SOC.2")     
levels(d.sens$SOC1)

## Regression: SOC 1 (major categories) - weighted, adj. for demographics only
lm.COGi.1a.wt1991 <- lm(as.formula(mk_frm("COG.i", cov1.a)), data = d.sens, 
	                weights = SOCwt.1991)
lm.COGi.1a.scl.wt1991 <- lm(as.formula(mk_frm("COG.i.scl", cov1.a.scl)), 
	                    data = d.sens, weights = SOCwt.1991)
lm.COGls.1a.wt1991 <- lm(as.formula(mk_frm("COG.ls", cov1.a)), data = d.sens, 
	                 weights = SOCwt.1991)
lm.COGls.1a.scl.wt1991 <- lm(as.formula(mk_frm("COG.ls.scl", cov1.a.scl)), 
	                     data = d.sens, weights = SOCwt.1991)

lm.COGi.1a.wt2021 <- lm(as.formula(mk_frm("COG.i", cov1.a)), data = d.sens, 
	                weights = SOCwt.2021)
lm.COGi.1a.scl.wt2021 <- lm(as.formula(mk_frm("COG.i.scl", cov1.a.scl)), 
	                    data = d.sens, weights = SOCwt.2021)
lm.COGls.1a.wt2021 <- lm(as.formula(mk_frm("COG.ls", cov1.a)), data = d.sens, 
	                 weights = SOCwt.2021)
lm.COGls.1a.scl.wt2021 <- lm(as.formula(mk_frm("COG.ls.scl", cov1.a.scl)), 
	                     data = d.sens, weights = SOCwt.2021)


# ------------------------------------------------------------------------------
# Tables w/Regression Results
# ------------------------------------------------------------------------------

tbl_ests <- function(rslt, nm=NULL, alpha=.05, precis=precis, show.p = FALSE)   
    {
	Z = qnorm(alpha/2)
	rslt = as.data.frame(summary(rslt)$coefficients)
    names(rslt) = c("B", "SE", "t", "p")
	zCI = Z * rslt$SE
	CI_lo = rslt$B - zCI
	CI_hi = rslt$B + zCI
	rslt[,1:3] = round(rslt[,1:3], precis)
	CI = cbind(CI_lo, CI_hi)
	CI_lo = apply(CI,1,min)
	CI_hi = apply(CI,1,max)
	rslt$CI = paste("[", format(round(CI_lo, precis), nsmall=precis), ",",
		                 format(round(CI_hi, precis), nsmall=precis), "]",
		                 sep = ''
	                )
	rslt$prd = row.names(rslt)
	rslt = rslt[,c(6,1,5,3,4)]
	row.names(rslt) = c(1:nrow(rslt))
	rslt$p = if(show.p == TRUE)
	         {
	         round(rslt$p,3)
	         } else { ifelse(rslt$p < alpha, '', 'ns')}

	if(!is.null(nm)) 
		{
		names(rslt)[2:5] = paste(nm, names(rslt)[2:5], sep = '.')
	    }
	rslt
    }

combine_Bbeta <- function(fit.i, fit.i.scl, fit.ls, fit.ls.scl)
    {
    names(fit.i.scl)[1] = "i.Beta"
    smry.i = cbind(fit.i[,1:3], fit.i.scl[,2],fit.i[,5])	
    smry.ls = cbind(fit.ls[,1:3], fit.ls.scl[,2],fit.ls[,5])
    smry = merge(smry.i, smry.ls, by = "prd", all.x = T, all.y = T)
    names(smry) = c("prd", "i.B", "i.CI", "i.Beta", "i.p",
                           "ls.B", "ls.CI", "ls.Beta", "ls.p")
    smry
    }

# define alpha for the CIs
alpha = .005
precis = 2  # change to 5 to check CI0 values (avoid rounding mistakes)

## Table 2 (participants w/SOC)
smry.i.1a      = tbl_ests(lm.COGi.1a,      "i",  alpha, precis)
smry.i.1a.scl  = tbl_ests(lm.COGi.1a.scl,  "i",  alpha, precis)
smry.ls.1a     = tbl_ests(lm.COGls.1a,     "ls", alpha, precis)
smry.ls.1a.scl = tbl_ests(lm.COGls.1a.scl, "ls", alpha, precis)
smry.i.2a      = tbl_ests(lm.COGi.2a,      "i",  alpha, precis)
smry.i.2a.scl  = tbl_ests(lm.COGi.2a.scl,  "i",  alpha, precis)
smry.ls.2a     = tbl_ests(lm.COGls.2a,     "ls", alpha, precis)
smry.ls.2a.scl = tbl_ests(lm.COGls.2a.scl, "ls", alpha, precis)
smry.1a = combine_Bbeta(smry.i.1a, smry.i.1a.scl, smry.ls.1a, smry.ls.1a.scl)
smry.2a = combine_Bbeta(smry.i.2a, smry.i.2a.scl, smry.ls.2a, smry.ls.2a.scl)
smry.1a$prd = gsub("SOC1", '', smry.1a$prd)
smry.2a$prd = gsub("SOC2", '', smry.2a$prd)
smry.1a = smry.1a[-c(2:4),]
smry.1a[1,"prd"] = paste(levels(d$SOC1)[[1]], " [REF1]", sep = '')
smry.2a[1,"prd"] = paste(levels(d$SOC2)[[1]], " [REF2]", sep = '')
smry.a = rbind(smry.1a, smry.2a)
smry.a = smry.a[order(smry.a$prd),]
row.names(smry.a) = c(1:nrow(smry.a))
smry.a = smry.a[c(7:nrow(smry.a),4:6,1:3),]  
smry.a

## Supplemental Table 3 (participants w/lifestyle data)
smry.i.1b      = tbl_ests(lm.COGi.1b,      "i",  alpha, precis)
smry.i.1b.scl  = tbl_ests(lm.COGi.1b.scl,  "i",  alpha, precis)
smry.ls.1b     = tbl_ests(lm.COGls.1b,     "ls", alpha, precis)
smry.ls.1b.scl = tbl_ests(lm.COGls.1b.scl, "ls", alpha, precis)
smry.i.2b      = tbl_ests(lm.COGi.2b,      "i",  alpha, precis)
smry.i.2b.scl  = tbl_ests(lm.COGi.2b.scl,  "i",  alpha, precis)
smry.ls.2b     = tbl_ests(lm.COGls.2b,     "ls", alpha, precis)
smry.ls.2b.scl = tbl_ests(lm.COGls.2b.scl, "ls", alpha, precis)
smry.1b = combine_Bbeta(smry.i.1b, smry.i.1b.scl, smry.ls.1b, smry.ls.1b.scl)
smry.2b = combine_Bbeta(smry.i.2b, smry.i.2b.scl, smry.ls.2b, smry.ls.2b.scl)
smry.1b$prd = gsub("SOC1", '', smry.1b$prd)
smry.2b$prd = gsub("SOC2", '', smry.2b$prd)
smry.1b = smry.1b[-c(2:17),]
smry.1b[1,"prd"] = paste(levels(d$SOC1)[[1]], " [REF1]", sep = '')
smry.2b[1,"prd"] = paste(levels(d$SOC2)[[1]], " [REF2]", sep = '')
smry.b = rbind(smry.1b, smry.2b)
smry.b = smry.b[order(smry.b$prd),]
row.names(smry.b) = c(1:nrow(smry.b))
smry.b = smry.b[c(20:nrow(smry.b),17:19,1,3,10,5:7,4,8,2,16,14,12,15,9,11,13),]
smry.b

# Table 3
smry.i.4a2      = tbl_ests(lm.COGi.4a2,      "i",  alpha, precis)
smry.i.4a2.scl  = tbl_ests(lm.COGi.4a2.scl,  "i",  alpha, precis)
smry.ls.4a1     = tbl_ests(lm.COGls.4a1,     "ls", alpha, precis)
smry.ls.4a1.scl = tbl_ests(lm.COGls.4a1.scl, "ls", alpha, precis)
smry.c = combine_Bbeta(smry.i.4a2, smry.i.4a2.scl, 
	                   smry.ls.4a1, smry.ls.4a1.scl)
smry.c[1,"prd"] = "Skill4 [REF]"
smry.c$prd = gsub("SOC_Skill", "Skill", smry.c$prd)
smry.c = smry.c[c(seq(12,18,2),1,10,seq(13,19,2),6:8,5,9,3,2,4,11), ]
smry.c

# Supplemental Table 4 (SOC1 1991-weighted regression results)
smry.i.1a.wt1991      = tbl_ests(lm.COGi.1a.wt1991,      "i",  alpha, precis)
smry.i.1a.scl.wt1991  = tbl_ests(lm.COGi.1a.scl.wt1991,  "i",  alpha, precis)
smry.ls.1a.wt1991     = tbl_ests(lm.COGls.1a.wt1991,     "ls", alpha, precis)
smry.ls.1a.scl.wt1991 = tbl_ests(lm.COGls.1a.scl.wt1991, "ls", alpha, precis)
smry.a.wt1991 = combine_Bbeta(smry.i.1a.wt1991, smry.i.1a.scl.wt1991, 
	                      smry.ls.1a.wt1991, smry.ls.1a.scl.wt1991)
smry.a.wt1991$prd = gsub("SOC1", '', smry.a.wt1991$prd)
smry.a.wt1991[1,"prd"] = paste(levels(d.sens$SOC1)[[1]], " [REF]", sep = '')
smry.a.wt1991 = smry.a.wt1991[c(5,1,6:12,2:4),]
smry.a.wt1991

# Supplemental Table 5 (SOC1 2021-weighted regression results)
smry.i.1a.wt2021      = tbl_ests(lm.COGi.1a.wt2021,      "i",  alpha, precis)
smry.i.1a.scl.wt2021  = tbl_ests(lm.COGi.1a.scl.wt2021,  "i",  alpha, precis)
smry.ls.1a.wt2021     = tbl_ests(lm.COGls.1a.wt2021,     "ls", alpha, precis)
smry.ls.1a.scl.wt2021 = tbl_ests(lm.COGls.1a.scl.wt2021, "ls", alpha, precis)
smry.a.wt2021 = combine_Bbeta(smry.i.1a.wt2021, smry.i.1a.scl.wt2021, 
	                      smry.ls.1a.wt2021, smry.ls.1a.scl.wt2021)
smry.a.wt2021$prd = gsub("SOC1", '', smry.a.wt2021$prd)
smry.a.wt2021[1,"prd"] = paste(levels(d.sens$SOC1)[[1]], " [REF]", sep = '')
smry.a.wt2021 = smry.a.wt2021[c(5,1,6:12,2:4),]
smry.a.wt2021


# ------------------------------------------------------------------------------
# Write out to csv files
# ------------------------------------------------------------------------------

rd = "Path/To/Your/Preferred/Directory/"

# Main MS
write.csv(smry0.a, paste(rd, "T1.csv", sep = ''), row.names = F)
write.csv(smry.a, paste(rd,  "T2.csv", sep = ''), row.names = F)
write.csv(smry.c, paste(rd,  "T3.csv", sep = ''), row.names = F)

# Supplement
write.csv(smry0.b1,  paste(rd, "ST1.csv", sep = ''), row.names = F)
write.csv(smry0.b2, paste(rd, "ST2.csv", sep = ''), row.names = F)
write.csv(smry.b, paste(rd, "ST3.csv", sep = ''), row.names = F)
write.csv(smry.a.wt1991, paste(rd, "ST4.csv", sep = ''), row.names = F)
write.csv(smry.a.wt2021, paste(rd, "ST5.csv", sep = ''), row.names = F)


# ------------------------------------------------------------------------------
# Follow-up sensitivity analyses: Prediction of individual cognitive markers
# ------------------------------------------------------------------------------

# Speed
lm.fsSPDi.2  <- lm(as.formula(mk_frm("fsSPD.i",  cov2.0)), data = d)
lm.fsSPDls.2 <- lm(as.formula(mk_frm("fsSPD.ls", cov2.0)), data = d)
summary(lm.fsSPDi.2)
summary(lm.fsSPDls.2)

# Reasoning
lm.fsRSNi.2  <- lm(as.formula(mk_frm("fsRSN.i",  cov2.0)), data = d)
lm.fsRSNls.2 <- lm(as.formula(mk_frm("fsRSN.ls", cov2.0)), data = d)
summary(lm.fsRSNi.2)
summary(lm.fsRSNls.2)

# Visual memory
lm.fsMVISi.2  <- lm(as.formula(mk_frm("fsMVIS.i",  cov2.0)), data = d)
lm.fsMVISls.2 <- lm(as.formula(mk_frm("fsMVIS.ls", cov2.0)), data = d)
summary(lm.fsMVISi.2)
summary(lm.fsMVISls.2)


